由于目前的计分应该是有问题的,暂时没进一步分析,预计后续的处理。7:验证不同问卷检出率是否有所不同,因此进行检出率分析。
# 载入必要的包
library(here)
## here() starts at D:/心理健康测量/MH_CPL/5_Analysis/5_1_Depression/5_1_2 Measurement data analysis
library(bruceR)
##
## bruceR (v2023.9)
## Broadly Useful Convenient and Efficient R functions
##
## Packages also loaded:
## ✔ data.table ✔ emmeans
## ✔ dplyr ✔ lmerTest
## ✔ tidyr ✔ effectsize
## ✔ stringr ✔ performance
## ✔ ggplot2 ✔ interactions
##
## Main functions of `bruceR`:
## cc() Describe() TTEST()
## add() Freq() MANOVA()
## .mean() Corr() EMMEANS()
## set.wd() Alpha() PROCESS()
## import() EFA() model_summary()
## print_table() CFA() lavaan_summary()
##
## For full functionality, please install all dependencies:
## install.packages("bruceR", dep=TRUE)
##
## Online documentation:
## https://psychbruce.github.io/bruceR
##
## To use this package in publications, please cite:
## Bao, H.-W.-S. (2023). bruceR: Broadly useful convenient and efficient R functions (Version 2023.9) [Computer software]. https://CRAN.R-project.org/package=bruceR
##
## NEWS: A new version of bruceR (2024.6) is available (2024-06-13)!
##
## ***** Please update *****
## install.packages("bruceR", dep=TRUE)
##
## These packages are dependencies of `bruceR` but not installed:
## - cowplot, ggtext, see, lmtest, vars, phia, BayesFactor, GPArotation
##
## ***** Install all dependencies *****
## install.packages("bruceR", dep=TRUE)
##
## 载入程序包:'bruceR'
## The following object is masked _by_ 'package:data.table':
##
## %notin%
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ data.table::between() masks dplyr::between()
## ✖ Matrix::expand() masks tidyr::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ data.table::first() masks dplyr::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ data.table::last() masks dplyr::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ Matrix::pack() masks tidyr::pack()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ lubridate::second() masks data.table::second()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ Matrix::unpack() masks tidyr::unpack()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx)
## Warning: 程序包'openxlsx'是用R版本4.4.1 来建造的
library(ggcorrplot)
## Warning: 程序包'ggcorrplot'是用R版本4.4.1 来建造的
#载入数据
raw_data <- bruceR::import(here::here("data", "Rawdata1.xlsx"))
## New names:
## • `量表ID` -> `量表ID...6`
## • `量表名称` -> `量表名称...7`
## • `量表ID` -> `量表ID...49`
## • `量表ID` -> `量表ID...69`
## • `量表ID` -> `量表ID...80`
## • `量表名称` -> `量表名称...81`
## • `量表ID` -> `量表ID...89`
## • `量表ID` -> `量表ID...98`
#筛选与抑郁有关的数据
selected_data <- raw_data %>%
select(就诊卡号, 性别, 年龄,
DSRSC1:DSRSC18,
PHQ1:PHQ9,
DASS3, DASS5, DASS10, DASS13, DASS16, DASS17, DASS21,
CDI1:CDI27)
#转换数据类型
selected_data_numeric <- selected_data %>%
mutate(性别 = factor(性别, levels = c("男", "女"), labels = c(1, 2))) %>%
mutate(across(matches("性别|年龄|DSRSC|PHQ|DASS|CDI"), as.numeric))
#处理缺失值
clean_data <- selected_data_numeric %>%
drop_na()
#原始数据-1,用以方便计算检出率
transformed_data <- clean_data %>%
mutate(across(matches("DSRSC|PHQ|DASS|CDI"), ~ .x - 1))
#描述统计
# 计算性别比例
gender_proportion <- clean_data %>%
summarise(
male = sum(性别 == 1, na.rm = TRUE),
female = sum(性别 == 2, na.rm = TRUE),
total = n(),
male_proportion = male / total * 100,
female_proportion = female / total * 100
)
# 打印性别比例
print(gender_proportion)
## male female total male_proportion female_proportion
## 1 6754 6078 12832 52.63404 47.36596
# 计算年龄的范围
age_range <- clean_data %>%
summarise(
min_age = min(年龄, na.rm = TRUE),
max_age = max(年龄, na.rm = TRUE)
)
# 打印年龄范围
print(age_range)
## min_age max_age
## 1 0 29
# 筛选出年龄在6岁以下的数据
under_6_data <- clean_data %>%
filter(年龄 < 6)
# 打印出年龄在6岁以下的数据
print(under_6_data)
## 就诊卡号 性别 年龄 DSRSC1 DSRSC2 DSRSC3 DSRSC4 DSRSC5 DSRSC6 DSRSC7 DSRSC8
## 1 0012712415 1 5 2 1 3 1 3 1 2 2
## 2 0012768596 1 2 2 2 1 1 3 2 3 2
## 3 0012808861 1 0 1 1 2 1 3 3 1 1
## 4 0012858882 2 0 2 3 2 2 3 2 2 2
## 5 1215857130 1 1 2 1 2 2 3 3 2 1
## 6 0012939870 1 5 1 2 3 1 2 3 1 2
## 7 0012989858 2 2 3 1 2 1 2 3 1 2
## 8 0009459552 1 5 1 1 2 1 3 2 1 1
## 9 0009491458 1 5 1 1 2 1 3 2 2 2
## 10 0012680982 2 0 1 1 2 1 3 2 1 1
## 11 0013030057 1 3 1 1 2 1 3 3 1 1
## 12 0011959389 2 5 1 1 3 1 3 2 2 2
## 13 0013497374 1 0 3 1 3 2 3 3 3 2
## 14 0008993491 2 0 3 3 1 1 3 1 1 2
## 15 0013593099 2 5 1 1 3 1 3 3 1 1
## 16 0013615049 1 2 2 2 3 2 3 3 1 2
## 17 0013784963 2 3 1 1 3 3 3 1 3 2
## 18 0013786955 2 5 1 2 3 1 3 1 1 2
## 19 0013939368 1 0 2 2 1 2 3 3 2 1
## 20 0013943658 1 5 2 1 3 1 3 3 1 1
## 21 0013981579 1 5 2 3 3 1 3 3 2 2
## DSRSC9 DSRSC10 DSRSC11 DSRSC12 DSRSC13 DSRSC14 DSRSC15 DSRSC16 DSRSC17
## 1 2 3 2 2 1 2 3 1 3
## 2 2 3 2 2 2 2 3 2 2
## 3 1 3 1 1 1 3 3 1 3
## 4 2 2 2 3 2 2 3 2 2
## 5 3 3 2 2 2 3 2 1 3
## 6 1 3 3 1 1 2 3 1 3
## 7 2 2 3 2 3 1 3 2 3
## 8 1 3 1 2 1 3 2 1 3
## 9 2 3 2 1 1 2 1 1 3
## 10 1 3 2 2 1 3 3 1 2
## 11 2 3 1 1 1 2 2 1 3
## 12 3 3 1 2 1 3 3 1 3
## 13 1 3 1 2 2 1 3 3 3
## 14 3 1 3 3 3 2 3 3 3
## 15 1 3 1 1 1 3 3 1 3
## 16 2 1 2 2 3 2 3 3 3
## 17 3 2 3 2 2 3 3 2 2
## 18 1 3 1 1 1 3 3 1 3
## 19 3 2 3 2 1 2 2 1 2
## 20 2 3 2 1 1 2 2 1 3
## 21 2 2 2 2 3 3 2 2 2
## DSRSC18 PHQ1 PHQ2 PHQ3 PHQ4 PHQ5 PHQ6 PHQ7 PHQ8 PHQ9 DASS3 DASS5 DASS10
## 1 2 1 2 1 1 4 1 4 1 1 1 3 1
## 2 2 4 3 3 2 1 2 4 2 2 2 2 2
## 3 2 1 1 4 1 1 1 1 1 1 1 1 1
## 4 1 2 2 4 4 1 2 1 4 2 4 4 4
## 5 3 4 2 4 1 4 3 4 4 1 3 4 2
## 6 2 4 2 4 1 3 3 3 1 1 3 1 1
## 7 1 4 2 4 4 4 4 4 4 4 4 4 4
## 8 2 2 2 4 1 1 1 2 3 1 1 4 1
## 9 3 1 4 4 1 1 1 4 2 1 2 3 1
## 10 2 1 2 1 1 1 1 1 1 1 1 1 1
## 11 3 2 2 2 1 1 2 1 2 1 1 1 1
## 12 3 1 2 4 2 2 1 1 1 1 1 1 1
## 13 3 1 1 4 1 4 3 1 1 1 2 2 2
## 14 3 2 2 4 4 2 1 1 1 4 1 1 1
## 15 3 2 1 2 2 2 1 1 1 1 2 1 1
## 16 2 1 1 1 1 1 1 1 1 1 1 2 1
## 17 2 4 2 1 4 4 4 3 2 2 2 4 4
## 18 3 1 2 3 2 4 1 1 2 1 1 2 1
## 19 1 2 2 4 2 2 2 1 4 2 1 4 3
## 20 2 1 2 2 1 2 1 1 4 1 2 3 1
## 21 2 2 2 3 2 4 1 2 3 2 2 4 2
## DASS13 DASS16 DASS17 DASS21 CDI1 CDI2 CDI3 CDI4 CDI5 CDI6 CDI7 CDI8 CDI9
## 1 1 2 1 1 3 2 3 2 1 1 3 3 1
## 2 3 2 2 1 3 1 2 2 2 3 2 2 2
## 3 1 1 1 1 1 2 2 1 1 3 3 3 1
## 4 2 2 1 2 1 1 3 2 2 1 2 3 2
## 5 2 3 1 1 2 2 3 2 1 1 2 2 2
## 6 1 1 1 1 1 2 2 1 1 1 3 3 2
## 7 2 4 1 4 1 1 3 2 1 3 2 2 2
## 8 2 1 1 1 2 2 2 2 1 1 3 3 1
## 9 1 1 1 1 1 2 2 2 1 2 3 3 1
## 10 1 2 1 1 1 2 2 2 3 1 3 2 1
## 11 1 1 1 1 2 3 2 1 1 1 3 3 1
## 12 2 1 1 1 1 3 1 2 1 1 3 3 1
## 13 1 1 1 1 1 1 1 2 3 2 3 3 1
## 14 1 1 1 2 1 2 2 2 3 1 2 1 2
## 15 1 1 1 1 1 2 2 1 3 1 3 3 1
## 16 1 1 1 1 1 2 2 2 3 1 3 3 1
## 17 1 3 4 4 1 1 3 2 3 1 2 2 2
## 18 1 1 1 1 1 2 2 1 3 1 3 3 1
## 19 2 1 2 2 1 1 3 2 1 1 2 1 2
## 20 1 2 1 1 1 2 1 1 3 1 3 3 1
## 21 3 3 1 2 3 1 2 2 2 2 2 3 2
## CDI10 CDI11 CDI12 CDI13 CDI14 CDI15 CDI16 CDI17 CDI18 CDI19 CDI20 CDI21
## 1 3 3 1 2 1 1 3 1 2 1 1 1
## 2 1 2 3 2 2 2 2 2 2 2 2 2
## 3 3 3 1 3 1 3 3 1 3 2 1 3
## 4 3 3 2 1 1 2 2 2 2 2 1 1
## 5 2 2 2 2 1 2 3 1 3 1 2 1
## 6 3 3 1 3 1 1 2 1 2 1 1 1
## 7 1 1 1 2 3 2 3 3 3 3 2 3
## 8 3 3 1 3 1 1 1 1 3 1 2 1
## 9 3 3 1 3 1 3 3 1 2 1 2 2
## 10 3 3 3 3 1 3 3 1 3 1 2 3
## 11 3 3 2 1 1 1 3 1 3 1 2 3
## 12 3 3 1 3 1 3 3 1 3 1 1 1
## 13 3 3 2 2 1 2 1 1 3 1 2 2
## 14 2 3 1 3 1 3 2 3 2 1 1 2
## 15 3 3 1 3 1 3 3 1 3 1 1 2
## 16 3 3 1 3 1 3 3 1 2 1 1 2
## 17 3 3 2 1 2 3 3 2 2 1 1 1
## 18 3 3 1 3 1 3 3 1 2 1 1 3
## 19 3 3 1 2 2 2 2 1 3 2 1 2
## 20 3 3 1 2 1 1 3 1 2 1 1 2
## 21 3 1 2 1 1 1 3 2 1 1 2 1
## CDI22 CDI23 CDI24 CDI25 CDI26 CDI27
## 1 2 1 2 3 2 2
## 2 2 3 1 2 1 2
## 3 1 2 2 3 2 1
## 4 3 3 1 1 2 3
## 5 2 3 1 3 2 3
## 6 1 1 3 3 1 1
## 7 1 1 2 3 2 1
## 8 2 1 3 3 2 2
## 9 2 1 3 3 2 3
## 10 1 2 2 3 1 1
## 11 1 1 3 3 1 1
## 12 2 1 2 3 2 2
## 13 2 2 1 3 2 2
## 14 2 1 1 3 2 2
## 15 2 1 3 3 1 1
## 16 1 2 2 2 1 1
## 17 2 3 1 3 2 1
## 18 1 1 3 3 1 1
## 19 2 3 2 3 2 3
## 20 1 1 1 3 2 2
## 21 2 2 2 2 3 2
#反向计分前的相关分析
correlation_matrix <- transformed_data %>%
select(matches("DSRSC|PHQ|DASS|CDI")) %>%
cor()
#导出
output_file <- here::here("output", "correlation_matrix.xlsx")
write.xlsx(correlation_matrix, output_file)
#可视化
p <- ggcorrplot(correlation_matrix, lab = TRUE,
method = "circle",
outline.color = "white",
colors = c("blue", "white", "red"),
ggtheme = ggplot2::theme_minimal(base_family = "sans"),
title = "未反向计分的相关系数") +
theme_minimal(base_family = "sans") +
theme(
text = element_text(size = 20, color = "black"),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 14),
axis.text.y = element_text(size = 14),
axis.title = element_text(size = 22),
plot.background = element_rect(fill = "gray90"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "gray90", color = NA),
legend.position = "bottom",
legend.key.width = unit(4, "cm"),
legend.key.height = unit(1, "cm")
) +
scale_size_continuous(range = c(5, 12)) +
labs(x = "", y = "")
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
#保存图片
ggsave(here::here("output", "correlation_heatmap.png"),
plot = p, width = 32, height = 28, dpi = 300)